

import pandas as pd
import numpy as np
from pandasql import sqldf
from datetime import datetime
from dateutil import parser
import statsmodels.api as sm
import sqlite3
from dateutil.relativedelta import relativedelta


DB_alldata = sqlite3.connect('../Database/alldata.db')
bomb_1 = pd.read_sql_query("select * from bomb_1", con = DB_alldata).set_index('usid')
lca_1 = pd.read_sql_query("select * from lca_1",con = DB_alldata).set_index('usid')
score_1 = pd.read_sql_query("select * from HES",con = DB_alldata).set_index('usid')
DB_alldata.close()

bomb_1.qdate = bomb_1.qdate.apply(parser.parse)
lca_1.qdate = lca_1.qdate.apply(parser.parse)
score_1.date = score_1.date.apply(parser.parse)

def generatedata(score_date):
    bomb_date = score_date + relativedelta(months=1)
    lca_date = score_date + relativedelta(months=4)
    score = score_1.loc[(score_1.date>=score_date)&(score_1.date < bomb_date), :].iloc[:,[*range(1,20),*range(32,45)]] # select 19 scores and usid
    bomb = bomb_1.loc[bomb_1.qdate == bomb_date, 'fr_strikes_mean']
    lca = lca_1.loc[lca_1.qdate == lca_date, :].iloc[:,-6:]
    dt1 = score.join(bomb, how='inner').join(lca, how='inner')
    dt1.iloc[:,-6:] = dt1.iloc[:,-6:] - 1
    # create a database to save
    DB = sqlite3.connect('../Database/analysisdata.db')
    cursor = DB.cursor()
    dt1.to_sql(score_date.strftime('Data%Y_%m'), con = DB, if_exists = 'replace')
    DB.commit()
    cursor.close()

for score_date in [parser.parse('1969-09-01')+relativedelta(months=months) for months in range(0,39,3)]:
    generatedata(score_date)


# score_date between 69-7 and 72-7
# score_date = parser.parse('1969-07-01')
# bomb_date = score_date + relativedelta(months=3)
# lca_date = score_date + relativedelta(months=6)


